Checking the model with superclass hierarchy with no augmentation. (It was manually switched off in the .json file)



In [9]:

    
cd ..









    



/afs/inf.ed.ac.uk/user/s13/s1320903/Neuroglycerin/neukrill-net-work

Run the modification of check_test_score.py so that it can work with superclass representation.



In [1]:

    
import numpy as np
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import sklearn.metrics
import argparse
import os
import pylearn2.config.yaml_parse









    



Using gpu device 1: Tesla K40c

Check which core is free.



In [6]:

    
%env THEANO_FLAGS = 'device=gpu3,floatX=float32,base_compiledir=~/.theano/stonesoup3'









    



env: THEANO_FLAGS='device=gpu3,floatX=float32,base_compiledir=~/.theano/stonesoup3'



In [31]:

    
verbose = False
augment = 1
settings = neukrill_net.utils.Settings("settings.json")

Give the path to .json.



In [14]:

    
run_settings = neukrill_net.utils.load_run_settings('run_settings/alexnet_based_extra_convlayer_with_superclasses.json', 
            settings, force=True)



In [49]:

    
model = pylearn2.utils.serial.load(run_settings['pickle abspath'])



In [50]:

    
# format the YAML
yaml_string = neukrill_net.utils.format_yaml(run_settings, settings)
# load proxied objects
proxied = pylearn2.config.yaml_parse.load(yaml_string, instantiate=False)
# pull out proxied dataset
proxdata = proxied.keywords['dataset']
# force loading of dataset and switch to test dataset
proxdata.keywords['force'] = True
proxdata.keywords['training_set_mode'] = 'test'
proxdata.keywords['verbose'] = False
# then instantiate the dataset
dataset = pylearn2.config.yaml_parse._instantiate(proxdata)









    



(3089, 188)



In [51]:

    
if hasattr(dataset.X, 'shape'):
    N_examples = dataset.X.shape[0]
else:
    N_examples = len(dataset.X)
batch_size = 500
while N_examples%batch_size != 0:
    batch_size += 1
n_batches = int(N_examples/batch_size)



In [52]:

    
model.set_batch_size(batch_size)
X = model.get_input_space().make_batch_theano()
Y = model.fprop(X)
f = theano.function([X],Y)



In [53]:

    
import neukrill_net.encoding as enc
hier = enc.get_hierarchy()
lengths = sum([len(array) for array in hier])
y = np.zeros((N_examples*augment,lengths))
# get the data specs from the cost function using the model
pcost = proxied.keywords['algorithm'].keywords['cost']
cost = pylearn2.config.yaml_parse._instantiate(pcost)
data_specs = cost.get_data_specs(model)



In [54]:

    
i = 0 
for _ in range(augment):
    # make sequential iterator
    iterator = dataset.iterator(batch_size=batch_size,num_batches=n_batches,
                        mode='even_sequential', data_specs=data_specs)
    for batch in iterator:
        if verbose:
            print("    Batch {0} of {1}".format(i+1,n_batches*augment))
        y[i*batch_size:(i+1)*batch_size,:] = f(batch[0])
        i += 1

Best .pkl scores as:



In [47]:

    
logloss = sklearn.metrics.log_loss(dataset.y[:, :len(settings.classes)], y[:, :len(settings.classes)])
print("Log loss: {0}".format(logloss))









    



Log loss: 1.49980241373

Recent .pkl scores as: (rerun relevant cells with a different path)



In [55]:

    
logloss = sklearn.metrics.log_loss(dataset.y[:, :len(settings.classes)], y[:, :len(settings.classes)])
print("Log loss: {0}".format(logloss))









    



Log loss: 1.51845664465



In [62]:

    
%env THEANO_FLAGS = device=gpu2,floatX=float32,base_compiledir=~/.theano/stonesoup2









    



env: THEANO_FLAGS=device=gpu2,floatX=float32,base_compiledir=~/.theano/stonesoup2



In [63]:

    
%env









    Out[63]:





{'ASPELL_CONF': 'master british',
 'BASH': '/bin/bash',
 'BASH_FUNC__command_not_found()': '() {  echo "bash: $1: command not found" 1>&2\n}',
 'BASH_FUNC_colloq()': '() {  more /usr/local/alert/misc/coll\n}',
 'BASH_FUNC_command_not_found_handle()': '() {  unset command_not_found_handle;\n if [[ "$IFS" != \' \t\n\' ]]; then\n _command_not_found $*;\n fi;\n function dbg () \n { \n [[ -n $CRICHTON_DEBUG ]] && echo $1 1>&2\n };\n SORT="/usr/bin/_SortBashRC";\n PATTERN=\'[0-9][0-9][0-9].*\';\n if [[ -n $CRICHTON_DEBUG ]]; then\n modules=$($SORT $CRICHTON_SYS/$PATTERN $CRICHTON_USR/$PATTERN);\n else\n modules=$($SORT $CRICHTON_SYS/$PATTERN $CRICHTON_USR/$PATTERN 2>&-);\n fi;\n dbg "- Found $(/bin/echo $modules | /usr/bin/wc -w 2>&-) modules.";\n for mod in ${modules} _command_not_found;\n do\n dbg "-- $(/bin/basename $mod 2>&-)";\n CRICHTON_SYS=$CRICHTON_SYS CRICHTON_USR=$CRICHTON_USR "$mod" $*;\n exit=$?;\n dbg "[exit $exit]";\n [[ $exit == 255 ]] && { \n _command_not_found $1;\n break\n };\n [[ $exit == 0 ]] && break;\n done;\n return 127\n}',
 'BASH_FUNC_gnu()': '() {  command gnu -u $LOGNAME $*\n}',
 'BASH_FUNC_module()': '() {  eval `/usr/bin/modulecmd bash $*`\n}',
 'BASH_FUNC_purge()': '() {  rm -f *~ .*~ \\#*\\# tmp*\n}',
 'BASH_FUNC_resize()': '() {  eval `command resize`\n}',
 'BASH_FUNC_rvirsh()': '() {  KVMHOST=$1;\n shift;\n if [ "$KVMHOST" ]; then\n if [ "$KVMHOST" = "." ]; then\n URI="qemu:///system";\n else\n URI="qemu+ssh://$KVMHOST/system";\n fi;\n else\n URI="qemu:///system";\n fi;\n virsh --connect $URI $@\n}',
 'BASH_FUNC_setpath()': '() {  if [ -f /usr/bin/pathfix ]; then\n pathfix_binary=/usr/bin/pathfix;\n else\n pathfix_binary=/usr/local/bin/pathfix;\n fi;\n new_PATH=`$pathfix_binary $@`;\n [ "$new_PATH" ] && PATH="$new_PATH";\n export PATH\n}',
 'BASH_FUNC_setup()': '() {  source /usr/local/share/setup/setup.sh $*\n}',
 'CLICOLOR': '1',
 'CRICHTON_SYS': '/etc/crichton.d',
 'CRICHTON_USR': '/afs/inf.ed.ac.uk/user/s13/s1320903/.crichton.d',
 'CUDA_ROOT': '/opt/cuda-5.0.35',
 'CVS_RSH': 'ssh',
 'EDITOR': '/usr/bin/emacs',
 'ENVIRONMENT': 'fc5 ',
 'ENV_ROOT': '/usr/share/defenv/bash',
 'GIT_PAGER': 'cat',
 'GNUSTEP_MAKEFILES': '/usr/lib64/GNUstep/Makefiles',
 'G_BROKEN_FILENAMES': '1',
 'HOME': '/afs/inf.ed.ac.uk/user/s13/s1320903',
 'IDEA_JDK': '/usr/lib/jvm/java-sun',
 'JPY_PARENT_PID': '74537',
 'KDEDIRS': '/usr',
 'KDE_IS_PRELINKED': '1',
 'KRB5CCNAME': '/tmp/krb5cc_1024708_Fr20rB',
 'LANG': 'en_GB.UTF-8',
 'LD_LIBRARY_PATH': '/opt/cuda-5.0.35/lib:/opt/cuda-5.0.35/lib64',
 'LESSOPEN': '|/usr/bin/lesspipe.sh %s',
 'LOADEDMODULES': '',
 'LOGNAME': 's1320903',
 'LS_COLORS': 'rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arj=01;31:*.taz=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.dz=01;31:*.gz=01;31:*.lz=01;31:*.xz=01;31:*.bz2=01;31:*.tbz=01;31:*.tbz2=01;31:*.bz=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.rar=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.jpg=01;35:*.jpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.axv=01;35:*.anx=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.axa=01;36:*.oga=01;36:*.spx=01;36:*.xspf=01;36:',
 'MAIL': '/var/mail/s1320903',
 'MAPPATH': '/etc/amd:/etc',
 'MODULEPATH': '/usr/share/Modules/modulefiles:/etc/modulefiles',
 'MODULESHOME': '/usr/share/Modules',
 'PAGER': 'cat',
 'PATH': '/afs/inf.ed.ac.uk/user/s13/s1320903/Neuroglycerin/venv/bin:/afs/inf.ed.ac.uk/user/s13/s1320903/Neuroglycerin/venv/bin:/usr/lib64/qt-3.3/bin:/usr/lpp/mmfs/bin:/usr/local/bin/:/usr/local/sbin:/usr/bin:/bin:/opt/sicstus-4.0.1/bin',
 'PWD': '/afs/inf.ed.ac.uk/user/s13/s1320903/Neuroglycerin/neukrill-net-work/notebooks',
 'PYCHARM_JDK': '/usr/lib/jvm/java-sun',
 'QTDIR': '/usr/lib64/qt-3.3',
 'QTINC': '/usr/lib64/qt-3.3/include',
 'QTLIB': '/usr/lib64/qt-3.3/lib',
 'ROLES': ' ',
 'SHELL': '/bin/bash',
 'SHLVL': '3',
 'SHORT_HOSTNAME': 'stonesoup',
 'SSH_ASKPASS': '/usr/libexec/openssh/gnome-ssh-askpass',
 'SSH_CLIENT': '129.215.91.176 37570 22',
 'SSH_CONNECTION': '129.215.91.176 37570 129.215.33.107 22',
 'SSH_TTY': '/dev/pts/21',
 'STY': '73222.pts-21.stonesoup',
 'SshAgent': '/usr/bin/ssh-agent',
 'TERM': 'xterm-color',
 'TERMCAP': 'SC|screen|VT 100/ANSI X3.64 virtual terminal:\\\n\t:DO=\\E[%dB:LE=\\E[%dD:RI=\\E[%dC:UP=\\E[%dA:bs:bt=\\E[Z:\\\n\t:cd=\\E[J:ce=\\E[K:cl=\\E[H\\E[J:cm=\\E[%i%d;%dH:ct=\\E[3g:\\\n\t:do=^J:nd=\\E[C:pt:rc=\\E8:rs=\\Ec:sc=\\E7:st=\\EH:up=\\EM:\\\n\t:le=^H:bl=^G:cr=^M:it#8:ho=\\E[H:nw=\\EE:ta=^I:is=\\E)0:\\\n\t:li#24:co#80:am:xn:xv:LP:sr=\\EM:al=\\E[L:AL=\\E[%dL:\\\n\t:cs=\\E[%i%d;%dr:dl=\\E[M:DL=\\E[%dM:dc=\\E[P:DC=\\E[%dP:\\\n\t:im=\\E[4h:ei=\\E[4l:mi:IC=\\E[%d@:ks=\\E[?1h\\E=:\\\n\t:ke=\\E[?1l\\E>:vi=\\E[?25l:ve=\\E[34h\\E[?25h:vs=\\E[34l:\\\n\t:ti=\\E[?1049h:te=\\E[?1049l:us=\\E[4m:ue=\\E[24m:so=\\E[3m:\\\n\t:se=\\E[23m:mb=\\E[5m:md=\\E[1m:mr=\\E[7m:me=\\E[m:ms:\\\n\t:Co#8:pa#64:AF=\\E[3%dm:AB=\\E[4%dm:op=\\E[39;49m:AX:\\\n\t:vb=\\Eg:G0:as=\\E(0:ae=\\E(B:\\\n\t:ac=\\140\\140aaffggjjkkllmmnnooppqqrrssttuuvvwwxxyyzz{{||}}~~..--++,,hhII00:\\\n\t:po=\\E[5i:pf=\\E[4i:k0=\\E[10~:k1=\\EOP:k2=\\EOQ:k3=\\EOR:\\\n\t:k4=\\EOS:k5=\\E[15~:k6=\\E[17~:k7=\\E[18~:k8=\\E[19~:\\\n\t:k9=\\E[20~:k;=\\E[21~:F1=\\E[23~:F2=\\E[24~:F3=\\E[1;2P:\\\n\t:F4=\\E[1;2Q:F5=\\E[1;2R:F6=\\E[1;2S:F7=\\E[15;2~:\\\n\t:F8=\\E[17;2~:F9=\\E[18;2~:FA=\\E[19;2~:kb=\x7f:K2=\\EOE:\\\n\t:kB=\\E[Z:kF=\\E[1;2B:kR=\\E[1;2A:*4=\\E[3;2~:*7=\\E[1;2F:\\\n\t:#2=\\E[1;2H:#3=\\E[2;2~:#4=\\E[1;2D:%c=\\E[6;2~:%e=\\E[5;2~:\\\n\t:%i=\\E[1;2C:kh=\\E[1~:@1=\\E[1~:kH=\\E[4~:@7=\\E[4~:\\\n\t:kN=\\E[6~:kP=\\E[5~:kI=\\E[2~:kD=\\E[3~:ku=\\EOA:kd=\\EOB:\\\n\t:kr=\\EOC:kl=\\EOD:km:',
 'THEANO_FLAGS': 'device=gpu2,floatX=float32,base_compiledir=~/.theano/stonesoup2',
 'USER': 's1320903',
 'VIRTUAL_ENV': '/afs/inf.ed.ac.uk/user/s13/s1320903/Neuroglycerin/venv',
 'WINDOW': '0',
 'XPRINTER': 'xp_ps_spooldir_tmp_Xprintjobs',
 '_': '/usr/bin/krenew'}

Check the same model with 8 augmentation.



In [64]:

    
import numpy as np
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import sklearn.metrics
import argparse
import os
import pylearn2.config.yaml_parse

verbose = False
augment = 1
settings = neukrill_net.utils.Settings("settings.json")

run_settings = neukrill_net.utils.load_run_settings('run_settings/alexnet_based_extra_convlayer_with_superclasses_aug.json', 
            settings, force=True)

model = pylearn2.utils.serial.load(run_settings['pickle abspath'])

# format the YAML
yaml_string = neukrill_net.utils.format_yaml(run_settings, settings)
# load proxied objects
proxied = pylearn2.config.yaml_parse.load(yaml_string, instantiate=False)
# pull out proxied dataset
proxdata = proxied.keywords['dataset']
# force loading of dataset and switch to test dataset
proxdata.keywords['force'] = True
proxdata.keywords['training_set_mode'] = 'test'
proxdata.keywords['verbose'] = False
# then instantiate the dataset
dataset = pylearn2.config.yaml_parse._instantiate(proxdata)

if hasattr(dataset.X, 'shape'):
    N_examples = dataset.X.shape[0]
else:
    N_examples = len(dataset.X)
batch_size = 500
while N_examples%batch_size != 0:
    batch_size += 1
n_batches = int(N_examples/batch_size)

model.set_batch_size(batch_size)
X = model.get_input_space().make_batch_theano()
Y = model.fprop(X)
f = theano.function([X],Y)

import neukrill_net.encoding as enc
hier = enc.get_hierarchy()
lengths = sum([len(array) for array in hier])
y = np.zeros((N_examples*augment,lengths))
# get the data specs from the cost function using the model
pcost = proxied.keywords['algorithm'].keywords['cost']
cost = pylearn2.config.yaml_parse._instantiate(pcost)
data_specs = cost.get_data_specs(model)

i = 0 
for _ in range(augment):
    # make sequential iterator
    iterator = dataset.iterator(batch_size=batch_size,num_batches=n_batches,
                        mode='even_sequential', data_specs=data_specs)
    for batch in iterator:
        if verbose:
            print("    Batch {0} of {1}".format(i+1,n_batches*augment))
        y[i*batch_size:(i+1)*batch_size,:] = f(batch[0])
        i += 1









    



(24712, 188)

Best .pkl scored as:



In [65]:

    
logloss = sklearn.metrics.log_loss(dataset.y[:, :len(settings.classes)], y[:, :len(settings.classes)])
print("Log loss: {0}".format(logloss))









    



Log loss: 2.02097272697

Strange. Not as good as we hoped. Is there a problem with augmentation?

Let's plot the nll.



In [67]:

    
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
#import holoviews as hl
#load_ext holoviews.ipython
import sklearn.metrics



In [73]:

    
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/alexnet_based_extra_convlayer_with_superclasses_aug_recent.pkl")



In [75]:

    
channel = m.monitor.channels["valid_y_y_1_nll"]
plt.plot(channel.example_record,channel.val_record)









    Out[75]:





[<matplotlib.lines.Line2D at 0x7f6a5374a4d0>]

Looks like it's pretty stable at 4 and had this random strange glitch which gave the best result.

Look at the best pkl of the none-aug model again: (just to confirm that it was indeed good)



In [76]:

    
import numpy as np
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import sklearn.metrics
import argparse
import os
import pylearn2.config.yaml_parse

verbose = False
augment = 1
settings = neukrill_net.utils.Settings("settings.json")

run_settings = neukrill_net.utils.load_run_settings('run_settings/alexnet_based_extra_convlayer_with_superclasses.json', 
            settings, force=True)

model = pylearn2.utils.serial.load(run_settings['pickle abspath'])

# format the YAML
yaml_string = neukrill_net.utils.format_yaml(run_settings, settings)
# load proxied objects
proxied = pylearn2.config.yaml_parse.load(yaml_string, instantiate=False)
# pull out proxied dataset
proxdata = proxied.keywords['dataset']
# force loading of dataset and switch to test dataset
proxdata.keywords['force'] = True
proxdata.keywords['training_set_mode'] = 'test'
proxdata.keywords['verbose'] = False
# then instantiate the dataset
dataset = pylearn2.config.yaml_parse._instantiate(proxdata)

if hasattr(dataset.X, 'shape'):
    N_examples = dataset.X.shape[0]
else:
    N_examples = len(dataset.X)
batch_size = 500
while N_examples%batch_size != 0:
    batch_size += 1
n_batches = int(N_examples/batch_size)

model.set_batch_size(batch_size)
X = model.get_input_space().make_batch_theano()
Y = model.fprop(X)
f = theano.function([X],Y)

import neukrill_net.encoding as enc
hier = enc.get_hierarchy()
lengths = sum([len(array) for array in hier])
y = np.zeros((N_examples*augment,lengths))
# get the data specs from the cost function using the model
pcost = proxied.keywords['algorithm'].keywords['cost']
cost = pylearn2.config.yaml_parse._instantiate(pcost)
data_specs = cost.get_data_specs(model)

i = 0 
for _ in range(augment):
    # make sequential iterator
    iterator = dataset.iterator(batch_size=batch_size,num_batches=n_batches,
                        mode='even_sequential', data_specs=data_specs)
    for batch in iterator:
        if verbose:
            print("    Batch {0} of {1}".format(i+1,n_batches*augment))
        y[i*batch_size:(i+1)*batch_size,:] = f(batch[0])
        i += 1









    



(3089, 188)



In [77]:

    
logloss = sklearn.metrics.log_loss(dataset.y[:, :len(settings.classes)], y[:, :len(settings.classes)])
print("Log loss: {0}".format(logloss))









    



Log loss: 1.49980241373

It was. Annoying. Let's plot the nll too:



In [78]:

    
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/alexnet_based_extra_convlayer_with_superclasses.pkl")



In [79]:

    
channel = m.monitor.channels["valid_y_y_1_nll"]
plt.plot(channel.example_record,channel.val_record)









    Out[79]:





[<matplotlib.lines.Line2D at 0x7f6a5a676450>]

That's way nicer. Looks like it was working fine.

Now we are going to modify the dense dataset class so that for each image, the same exact image is produced. This was we will mimic the augmentation but effectively will run on exactly the same dataset. We can use the same .json and .yaml files too.